Todos los datos se tomaron desde muestras postmortem neurotipicos.
library("GenomicRanges")
library("biomaRt")
library("WGCNA")
library("reshape")
library("corrplot")
library("gProfileR")
library("tidyverse")
library("ggpubr")
credSNP <- vroom::vroom("datos_newAnalysis/AD_complete.csv")
## Rows: 477
## Columns: 71
## Delimiter: ","
## chr [ 5]: SNP, Ancestral, Minor, CONTEXT, Class
## dbl [66]: Global, Global_MAF, ACB, ACB_MAF, AFR, AFR_MAF, AMR, AMR_MAF, ASW, ASW_MAF, BEB,...
##
## Use `spec()` to retrieve the guessed column specification
## Pass a specification to the `col_types` argument to quiet this message
credranges <- GRanges(credSNP$seq_region_name, IRanges(credSNP$start, credSNP$start),
rsid = credSNP$SNP, Context=credSNP$CONTEXT)
credranges
## GRanges object with 477 ranges and 2 metadata columns:
## seqnames ranges strand | rsid Context
## <Rle> <IRanges> <Rle> | <character> <character>
## [1] 8 94979792 * | rs10098778 intron_variant
## [2] 8 27354759 * | rs10109834 intron_variant
## [3] 19 44903416 * | rs10119 3_prime_UTR_variant
## [4] 9 2844360 * | rs10122329 regulatory_region_va..
## [5] 2 233575317 * | rs10182651 intergenic_variant
## ... ... ... ... . ... ...
## [473] 19 3405594 * | rs9749589 intron_variant
## [474] 11 60156035 * | rs983392 intergenic_variant
## [475] 3 190951729 * | rs9877502 intergenic_variant
## [476] 17 75022679 * | rs9899728 intergenic_variant
## [477] 17 5081152 * | rs9916042 intron_variant
## -------
## seqinfo: 22 sequences from an unspecified genome; no seqlengths
Primero se cargan las regiones promotoras y exonicas para generar un objeto GRanges
exon <- read.table("data/Gencode19_exon.bed")
exonranges <- GRanges(exon[,1],IRanges(exon[,2], exon[,3]), gene = exon[,4])
promoter <- read.table("data/Gencode19_promoter.bed")
promoterranges <- GRanges(promoter[,1], IRanges(promoter[,2], promoter[,3]), gene = promoter[,4])
Se hace un sobrelape de los objetos GRanges de SNPs con exones
olap <- findOverlaps(credranges, exonranges)
credexon <- credranges[queryHits(olap)]
mcols(credexon) <- cbind(mcols(credexon), mcols(exonranges[subjectHits(olap)]))
Se hace un sobrelape de los objetos GRanges de SNPs con regiones promotoras
olap <- findOverlaps(credranges, promoterranges)
credpromoter <- credranges[queryHits(olap)]
mcols(credpromoter) <- cbind(mcols(credpromoter), mcols(promoterranges[subjectHits(olap)]))
Se unen los SNPs a sus supiestos genes diana usando interacciones de cromatina. Se cargan datos Hi-C y se genera un objeto GRange.
hic <- read.table("data/Promoter-anchored_chromatin_loops.bed", skip=1)
colnames(hic) <- c("chr", "TSS_start", "TSS_end", "Enhancer_start", "Enhancer_end")
hicranges <- GRanges(hic$chr, IRanges(hic$TSS_start, hic$TSS_end), enhancer=hic$Enhancer_start)
olap <- findOverlaps(hicranges, promoterranges)
hicpromoter <- hicranges[queryHits(olap)]
mcols(hicpromoter) <- cbind(mcols(hicpromoter), mcols(promoterranges[subjectHits(olap)]))
hicenhancer <- GRanges(seqnames(hicpromoter), IRanges(hicpromoter$enhancer, hicpromoter$enhancer+10000),
gene=hicpromoter$gene)
Se sobrelapan los SNNps con el objeto GRanges de Hi-C
olap <- findOverlaps(credranges, hicenhancer)
credhic <- credranges[queryHits(olap)]
mcols(credhic) <- cbind(mcols(credhic), mcols(hicenhancer[subjectHits(olap)]))
Se seleccionan los genes que resultaron del mapeo anterior. Los genes candidatos resultates para AD son:
ADgenes <- Reduce(union, list(credhic$gene, credexon$gene, credpromoter$gene))
SNPs <- c(credhic$rsid, credexon$rsid, credpromoter$rsid)
SNPs <- unique(SNPs)
Convertir Ensembl Gene ID a HGNC symbol
load("data/geneAnno2.rda")
ADhgnc <- geneAnno1[match(ADgenes, geneAnno1$ensembl_gene_id), "hgnc_symbol"]
ADhgnc <- ADhgnc[ADhgnc!=""]
Procesamiento de datos de expresion y meta data
datExpr <- read.csv("data/gene_array_matrix_csv/expression_matrix.csv", header = FALSE)
datExpr <- datExpr[,-1]
datMeta <- read.csv("data/gene_array_matrix_csv/columns_metadata.csv")
datProbes <- read.csv("data/gene_array_matrix_csv/rows_metadata.csv")
datExpr <- datExpr[datProbes$ensembl_gene_id!="",]
datProbes <- datProbes[datProbes$ensembl_gene_id!="",]
datExpr.cr <- collapseRows(datExpr, rowGroup = datProbes$ensembl_gene_id,
rowID= rownames(datExpr))
datExpr <- datExpr.cr$datETcollapsed
gename <- data.frame(datExpr.cr$group2row)
rownames(datExpr) <- gename$group
datExpr[1:5,1:7]
## V2 V3 V4 V5 V6 V7 V8
## ENSG00000000003 10.25030 10.70150 11.08360 10.06610 11.19010 10.44810 10.40350
## ENSG00000000005 3.51709 3.53494 3.72414 3.67140 3.62474 3.62463 3.58473
## ENSG00000000419 10.19980 10.36500 10.61390 10.15490 10.65210 9.92869 9.88633
## ENSG00000000457 5.79191 7.03140 7.18755 6.10701 6.77977 5.33604 6.05875
## ENSG00000000938 5.61206 5.53656 5.22315 5.81295 5.60542 5.66549 5.50235
Se especifican los estadios de desarrollo.
datMeta$Unit <- "Postnatal"
idx <- grep("pcw", datMeta$age)
datMeta$Unit[idx] <- "Prenatal"
idx <- grep("yrs", datMeta$age)
datMeta$Unit[idx] <- "Postnatal"
datMeta$Unit <- factor(datMeta$Unit, levels=c("Prenatal", "Postnatal"))
Se seleccionan las regiones corticales
datMeta$Region <- "SubCTX"
r <- c("A1C", "STC", "ITC", "TCx", "OFC", "DFC",
"VFC", "MFC", "M1C", "S1C", "IPC", "M1C-S1C",
"PCx", "V1C", "Ocx")
datMeta$Region[datMeta$structure_acronym %in% r] <- "CTX"
datExpr <- datExpr[,which(datMeta$Region=="CTX")]
datMeta <- datMeta[which(datMeta$Region=="CTX"),]
save(datExpr, datMeta, file="datos_newAnalysis/devExpr.rda")
Se extraen los perfiles de expresion de desarrollo de los genes de riesgo para AD Se hace un heatmap para ver la calidad de los datos
load("datos_newAnalysis/ADgenes.rda")
exprdat <- apply(datExpr[match(ADgenes, rownames(datExpr)),], 2, mean, na.rm=T)
dat <- data.frame(Region=datMeta$Region, Unit=datMeta$Unit, Expr=exprdat)
prueba <- datExpr[match(ADgenes, rownames(datExpr)),]
prueba <- na.omit(prueba)
ADhgnc <- geneAnno1[match(rownames(prueba), geneAnno1$ensembl_gene_id), "hgnc_symbol"]
ADhgnc <- ADhgnc[ADhgnc!=""]
library(RColorBrewer)
library(gplots)
mypalette <- brewer.pal(11,"RdYlBu")
morecols <- colorRampPalette(mypalette)
col.cell <- c("cyan","red")[dat$Unit]
heatmap.2(prueba,col=rev(morecols(50)),trace="none",
main="Genes de riesgo AD entre las muestras",
ColSideColors=col.cell,scale="row")
ggplot(dat,aes(x=Unit, y=Expr, fill=Unit, alpha=Unit)) +
ylab("Normalized expression") + geom_boxplot(outlier.size = NA) +
ggtitle("Expresion en cerebro: region cortical") + xlab("") +
scale_alpha_manual(values=c(0.2, 1)) + theme_classic() +
theme(legend.position="na", text = element_text(size = 20))
load("datos_newAnalysis/ADgenes.rda")
load("data/geneAnno2.rda")
targetname <- "AD"
targetgene <- ADhgnc
cellexp <- read.table("data/DER-20_Single_cell_expression_processed_TPM_backup.tsv",header=T,fill=T)
cellexp[1121,1] <- cellexp[1120,1]
cellexp <- cellexp[-1120,]
rownames(cellexp) <- cellexp[,1]
cellexp <- cellexp[,-1]
datExpr <- scale(cellexp,center=T, scale=F)
datExpr <- datExpr[,789:ncol(datExpr)]
exprdat <- apply(datExpr[match(targetgene, rownames(datExpr)),],2,mean,na.rm=T)
dat <- data.frame(Group=targetname, cell=names(exprdat), Expr=exprdat)
dat$celltype <- unlist(lapply(strsplit(dat$cell, split="[.]"),'[[',1))
dat <- dat[-grep("Ex|In",dat$celltype),]
dat$celltype <- gsub("Dev","Fetal",dat$celltype)
dat$celltype <- factor(dat$celltype, levels=c("Neurons","Astrocytes","Microglia","Endothelial",
"Oligodendrocytes","OPC","Fetal"))
ggplot(dat,aes(x=celltype, y=Expr, fill=celltype)) +
ylab("Normalized expression") + xlab("") +
geom_violin() + theme_bw() +
theme(axis.text.x=element_text(angle = 90, hjust=1),
text = element_text(size=15)) +
theme(legend.position="none") +
ggtitle(paste0("Cellular expression profiles of AD risk genes"))
library(hypeR)
library(tidyverse)
library(reactable)
genesets <- msigdb_gsets("Homo sapiens", "C2", "CP:REACTOME", clean=TRUE)
print(genesets)
## C2.CP:REACTOME v7.2.1
## 2 Ltr Circle Formation (7)
## A Tetrasaccharide Linker Sequence Is Required For Gag Synthesis (26)
## Abacavir Metabolism (5)
## Abacavir Transmembrane Transport (5)
## Abacavir Transport And Metabolism (10)
## Abc Family Proteins Mediated Transport (103)
genes <- read.table("datos_newAnalysis/ADgenes.txt")
signatures <- genes$V1
mhyp <- hypeR(signatures, genesets, test="hypergeometric", background=360000)
hyp_dots(mhyp, merge=TRUE, fdr=0.01, top=100, title="Co-expression Modules")
hyp_emap(mhyp)